#确定movielens-100k里的每个用户与物品交互数，以及每个物品和用户的交互数

from tqdm import tqdm
import random

path = "F:\\LCW\\PycharmProject\\pytorch-learn\\RecSys\\dataset\\patent-100k-new\\原patent-100k-new.txt"

with open(path,'r') as f:
    data = f.readlines()
    # 读取
    user_item = dict()
    user_list = set()
    for line in tqdm(data):
        user = line.split('\t')[0]
        item = line.split('\t')[1]
        if user not in user_item:
            user_item[user] = []
            user_item[user].append(item)
            user_list.add(user)
        else:
            user_item[user].append(item)
    # 根据物品交互数量从高到低排序
    print(user_item)

idcount = 1
with open('tendency.txt','a') as w:
    while idcount <= 800:
        #随机选取两个用户
        randomuserlist = random.sample(user_list,2)
        print(randomuserlist)
        for user in randomuserlist:
            #每个用户随机取12个物品
            randomitemlist = random.sample(user_item[user],12)
            print(randomitemlist)
            for randomitem in randomitemlist:
                w.write(str(idcount) + '\t' + str(randomitem) + '\t' + '1' + '\n')
        idcount+=1



